global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/adjusted_citations.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

* adjusted_citations.do
* v7
* This do-file builds citation counts normalized by technological field and year of application. 


* --------------------------------------------------------- *
* Define normalize program
* --------------------------------------------------------- *

cap program drop poissreg_normalize
program poissreg_normalize
	local year = year[1]
	di "Year: `year'"
	foreach citvar in cit_5yrs { 
		qui ppmlhdfe `citvar' authweight* fieldweight* if year == `year'
		predict pred, xb 
		gen `citvar'_norm = `citvar' / exp(pred)
		drop pred
	}
end

* --------------------------------------------------------- *
* Create a weighted docdb_family -> techn_field_nr mapping
* --------------------------------------------------------- *
 
* prepare applications and authority
* (I load orbis patents as in depvar.do (to reduce data size by a bit but still be general))
use appln_id ipr_type using ${dataset_dir}/patstat_orbis/Orbis_patents_list_2017_merged.dta, clear

*restrict to correct intelectual property type
keep if ipr_type==2
drop ipr_type

duplicates drop 

*merge in the patent authority
mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_info.dta, unmatched(master) ukeep(appln_auth) 
drop _m 

*count occurence of each authority
bysort appln_auth: egen count = count(appln_auth)
gen freq = count/_N

*simplify a bit
replace appln_auth = "OTHER" if freq < 0.001
drop count freq
gen authweight_ = 1
reshape wide authweight_, i(appln_id) j(appln_auth) string

* prepare technical fields
mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_id_techn_field.dta, unmatched(master) ukeep(weight techn_field_nr)
drop _m
replace techn_field_nr = 999 if missing(techn_field_nr)
replace weight = 0 if missing(techn_field_nr)
ren weight fieldweight_
reshape wide fieldweight_, i(appln_id) j(techn_field_nr)

* aggregate to families, renormalize auth + family weights
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id fam_earliest_appln_year)
drop _m
ren fam_earliest_appln_year year

*make the datasize more manageable 
keep if year >= 1991 & year <= 2018
collapse (sum) fieldweight_* authweight_*, by(docdb_family_id year)
egen appln_per_fam = rowtotal(authweight_*)
foreach var of varlist fieldweight_* {
	replace `var' = `var' / appln_per_fam
}
foreach var of varlist authweight* {
	replace `var' = `var' / appln_per_fam
}
drop appln_per_fam

* merge citations
mmerge docdb_family_id using ${commondata_dir}/patstat_2018b/citations_by_docdb_id_exclself.dta, unmatched(master) ukeep(cit_5yrs)
drop if year == 9999
drop _m

* run normalization Poisson regressions by year
runby poissreg_normalize, by(year) verbose status

* save
keep docdb_family cit_5yrs_norm
save ${dataset_dir}/patstat_orbis/citations_by_docdb_id_exclself_normalized.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat